Latent profile Analysis

Latent profile Analysis (LPA) is a type of latent variable model that can be use to identify latent classes or mixtures in a dataset, based on a set of continuous input variables (Gibson, 1959; Oberski, 2016).

pacman::p_load(dplyr, summarytools, car, mclust, tidyLPA, reshape2, tidyverse, plotly, semPlot, semTools, lavaan, texreg, lme4)
load("input/data/data.RData")

Confianza interpersonal

Estimación utilizando la media de las cinco olas por cada indicador

conf <- data %>% 
  dplyr::select(c02_rec_w01, c02_rec_w02, c02_rec_w03, c02_rec_w04, c02_rec_w05, 
                c03_rec_w01, c03_rec_w02, c03_rec_w03, c03_rec_w04, c03_rec_w05) %>% 
  rowwise() %>%
  mutate(conf1 = mean(c(c02_rec_w01, c02_rec_w02, c02_rec_w03, c02_rec_w04, c02_rec_w05), na.rm=T),
         conf2 = mean(c(c03_rec_w01, c03_rec_w02, c03_rec_w03, c03_rec_w04, c03_rec_w05), na.rm=T))

conf <- conf %>% dplyr::select(conf1, conf2)


conf %>% dplyr::select(conf1, conf2)  %>% sjlabelled::as_label(.)  %>%  dfSummary(, graph.col = FALSE)
## Data Frame Summary  
## conf  
## Dimensions: 1491 x 2  
## Duplicates: 1371  
## 
## ----------------------------------------------------------------------------------
## No   Variable    Stats / Values          Freqs (% of Valid)   Valid      Missing  
## ---- ----------- ----------------------- -------------------- ---------- ---------
## 1    conf1       Mean (sd) : 1.2 (0.4)   14 distinct values   1491       0        
##      [numeric]   min < med < max:                             (100.0%)   (0.0%)   
##                  1 < 1 < 3                                                        
##                  IQR (CV) : 0.4 (0.3)                                             
## 
## 2    conf2       Mean (sd) : 1.4 (0.5)   17 distinct values   1491       0        
##      [numeric]   min < med < max:                             (100.0%)   (0.0%)   
##                  1 < 1.4 < 3                                                      
##                  IQR (CV) : 0.8 (0.3)                                             
## ----------------------------------------------------------------------------------
BIC <- mclustBIC(conf)
plot(BIC)

summary(BIC)
## Best BIC values:
##              EEV,9        EEV,8      EEV,6
## BIC      -1028.136 -1034.279667 -1570.4444
## BIC diff     0.000    -6.143846  -542.3086
mod1 <- Mclust(conf, modelNames = "EEV", G = 3, x = BIC)
summary(mod1)
## ---------------------------------------------------- 
## Gaussian finite mixture model fitted by EM algorithm 
## ---------------------------------------------------- 
## 
## Mclust EEV (ellipsoidal, equal volume and shape) model with 3 components: 
## 
##  log-likelihood    n df       BIC       ICL
##       -1078.809 1491 13 -2252.612 -2496.649
## 
## Clustering table:
##    1    2    3 
##  121  101 1269
ICL <- mclustICL(conf)
plot(ICL)

summary(ICL)
## Best ICL values:
##              EEV,8      EEV,9      EEV,5
## ICL      -2002.922 -2157.6490 -2323.6377
## ICL diff     0.000  -154.7275  -320.7161

Visualización

means <- data.frame(mod1$parameters$mean, stringsAsFactors = FALSE) %>%
  rownames_to_column() %>%
  rename(Interest = rowname) %>%
  melt(id.vars = "Interest", variable.name = "Profile", value.name = "Mean") %>%
  mutate(Mean = round(Mean, 2))
p <- means %>%
  ggplot(aes(Interest, Mean, group = Profile, color = Profile)) +
  geom_point(size = 2.25) +
  geom_line(size = 1.25) +
  labs(x = NULL, y = "Standardized mean") +
  theme_bw(base_size = 14) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "top")
ggplotly(p, tooltip = c("Interest", "Mean")) %>%
  layout(legend = list(orientation = "h", y = 1.2))

Diversidad

conf <- data %>% 
  dplyr::select(c02_rec_w01, c02_rec_w02, c02_rec_w03, c02_rec_w04, c02_rec_w05, 
                c03_rec_w01, c03_rec_w02, c03_rec_w03, c03_rec_w04, c03_rec_w05) %>% 
  rowwise() %>%
  mutate(conf1 = mean(c(c02_rec_w01, c02_rec_w02, c02_rec_w03, c02_rec_w04, c02_rec_w05), na.rm=T),
         conf2 = mean(c(c03_rec_w01, c03_rec_w02, c03_rec_w03, c03_rec_w04, c03_rec_w05), na.rm=T))

conf <- conf %>% dplyr::select(conf1, conf2)


conf %>% dplyr::select(conf1, conf2)  %>% sjlabelled::as_label(.)  %>%  dfSummary(, graph.col = FALSE)
## Data Frame Summary  
## conf  
## Dimensions: 1491 x 2  
## Duplicates: 1371  
## 
## ----------------------------------------------------------------------------------
## No   Variable    Stats / Values          Freqs (% of Valid)   Valid      Missing  
## ---- ----------- ----------------------- -------------------- ---------- ---------
## 1    conf1       Mean (sd) : 1.2 (0.4)   14 distinct values   1491       0        
##      [numeric]   min < med < max:                             (100.0%)   (0.0%)   
##                  1 < 1 < 3                                                        
##                  IQR (CV) : 0.4 (0.3)                                             
## 
## 2    conf2       Mean (sd) : 1.4 (0.5)   17 distinct values   1491       0        
##      [numeric]   min < med < max:                             (100.0%)   (0.0%)   
##                  1 < 1.4 < 3                                                      
##                  IQR (CV) : 0.8 (0.3)                                             
## ----------------------------------------------------------------------------------
diversidad <- data %>% 
  dplyr::select(c06_04_w01, c06_04_w03, 
                c06_05_w01, c06_05_w03,
                c06_06_w01, c06_06_w03) %>% 
  rowwise() %>%
  mutate(div1 = mean(c(c06_04_w01, c06_04_w03), na.rm=T),
         div2 = mean(c(c06_05_w01, c06_05_w03), na.rm=T),
         div3 = mean(c(c06_06_w01, c06_06_w03), na.rm=T))

diversidad <- diversidad %>% dplyr::select(div1, div2, div3) %>% na.omit()

diversidad %>% select(div1, div2, div3)  %>% sjlabelled::as_label(.)  %>%  summarytools::dfSummary(, graph.col = FALSE)
## Data Frame Summary  
## diversidad  
## Dimensions: 1476 x 3  
## Duplicates: 1139  
## 
## ----------------------------------------------------------------------------------
## No   Variable    Stats / Values          Freqs (% of Valid)   Valid      Missing  
## ---- ----------- ----------------------- -------------------- ---------- ---------
## 1    div1        Mean (sd) : 2.7 (1.1)   1.00 : 182 (12.3%)   1476       0        
##      [numeric]   min < med < max:        1.50 : 113 ( 7.7%)   (100.0%)   (0.0%)   
##                  1 < 3 < 5               2.00 : 231 (15.7%)                       
##                  IQR (CV) : 1.5 (0.4)    2.50 : 204 (13.8%)                       
##                                          3.00 : 254 (17.2%)                       
##                                          3.50 : 200 (13.6%)                       
##                                          4.00 : 198 (13.4%)                       
##                                          4.50 :  60 ( 4.1%)                       
##                                          5.00 :  34 ( 2.3%)                       
## 
## 2    div2        Mean (sd) : 3 (1)       1.00 : 102 ( 6.9%)   1476       0        
##      [numeric]   min < med < max:        1.50 :  71 ( 4.8%)   (100.0%)   (0.0%)   
##                  1 < 3 < 5               2.00 : 189 (12.8%)                       
##                  IQR (CV) : 1.5 (0.3)    2.50 : 194 (13.1%)                       
##                                          3.00 : 309 (20.9%)                       
##                                          3.50 : 239 (16.2%)                       
##                                          4.00 : 227 (15.4%)                       
##                                          4.50 :  95 ( 6.4%)                       
##                                          5.00 :  50 ( 3.4%)                       
## 
## 3    div3        Mean (sd) : 2.6 (1)     1.00 : 190 (12.9%)   1476       0        
##      [numeric]   min < med < max:        1.50 : 136 ( 9.2%)   (100.0%)   (0.0%)   
##                  1 < 2.5 < 5             2.00 : 270 (18.3%)                       
##                  IQR (CV) : 1 (0.4)      2.50 : 211 (14.3%)                       
##                                          3.00 : 303 (20.5%)                       
##                                          3.50 : 166 (11.2%)                       
##                                          4.00 : 147 (10.0%)                       
##                                          4.50 :  41 ( 2.8%)                       
##                                          5.00 :  12 ( 0.8%)                       
## ----------------------------------------------------------------------------------
BIC <- mclustBIC(diversidad)
plot(BIC)

summary(BIC)
## Best BIC values:
##              EEV,9      EEV,8      VVE,2
## BIC      -9730.906 -10515.874 -10745.542
## BIC diff     0.000   -784.968  -1014.636
mod1 <- Mclust(diversidad, modelNames = "EEV", G = 3, x = BIC)
summary(mod1)
## ---------------------------------------------------- 
## Gaussian finite mixture model fitted by EM algorithm 
## ---------------------------------------------------- 
## 
## Mclust EEV (ellipsoidal, equal volume and shape) model with 3 components: 
## 
##  log-likelihood    n df       BIC       ICL
##        -5433.47 1476 23 -11034.77 -11816.62
## 
## Clustering table:
##   1   2   3 
## 138 686 652
ICL <- mclustICL(diversidad)
plot(ICL)

summary(ICL)
## Best ICL values:
##             EEV,9     VVV,2      EEE,1
## ICL      -10121.5 -11234.02 -11250.735
## ICL diff      0.0  -1112.52  -1129.238

Visualización

means <- data.frame(mod1$parameters$mean, stringsAsFactors = FALSE) %>%
  rownames_to_column() %>%
  rename(Interest = rowname) %>%
  melt(id.vars = "Interest", variable.name = "Profile", value.name = "Mean") %>%
  mutate(Mean = round(Mean, 2))
p <- means %>%
  ggplot(aes(Interest, Mean, group = Profile, color = Profile)) +
  geom_point(size = 2.25) +
  geom_line(size = 1.25) +
  labs(x = NULL, y = "Standardized mean") +
  theme_bw(base_size = 14) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "top")
ggplotly(p, tooltip = c("Interest", "Mean")) %>%
  layout(legend = list(orientation = "h", y = 1.2))

Reestimación utilizando la media del indicador en cada ola (o puntajes factoriales)

data <- data %>%
  rowwise() %>%
  mutate(conf_interpersonal_w01 =
           mean(c(c02_rec_w01, c03_rec_w01), na.rm = T))
data <- data %>%
  rowwise() %>%
  mutate(conf_interpersonal_w02 =
           mean(c(c02_rec_w02, c03_rec_w02), na.rm = T))
data <- data %>%
  rowwise() %>%
  mutate(conf_interpersonal_w03 =
           mean(c(c02_rec_w03, c03_rec_w03), na.rm = T))
data <- data %>%
  rowwise() %>%
  mutate(conf_interpersonal_w04 =
           mean(c(c02_rec_w04, c03_rec_w04), na.rm = T))
data <- data %>%
  rowwise() %>%
  mutate(conf_interpersonal_w05 =
           mean(c(c02_rec_w05, c03_rec_w05), na.rm = T))

conf <- data %>% dplyr::select(conf_interpersonal_w01, conf_interpersonal_w02, conf_interpersonal_w03, conf_interpersonal_w04, conf_interpersonal_w05) %>% na.omit()
BIC <- mclustBIC(conf)
plot(BIC)

summary(BIC)
## Best BIC values:
##              EEV,7     EEV,6     EEI,5
## BIC      -5720.825 -6995.977 -7659.932
## BIC diff     0.000 -1275.151 -1939.107
mod1 <- Mclust(conf, modelNames = "EEV", G = 3, x = BIC)
summary(mod1)
## ---------------------------------------------------- 
## Gaussian finite mixture model fitted by EM algorithm 
## ---------------------------------------------------- 
## 
## Mclust EEV (ellipsoidal, equal volume and shape) model with 3 components: 
## 
##  log-likelihood    n df       BIC       ICL
##       -4584.565 1462 52 -9548.082 -10224.57
## 
## Clustering table:
##   1   2   3 
## 180 430 852
ICL <- mclustICL(conf)
plot(ICL)

summary(ICL)
## Best ICL values:
##              EEV,7     EEI,5     EEV,6
## ICL      -6461.459 -7744.000 -7926.921
## ICL diff     0.000 -1282.541 -1465.461

Visualización

means <- data.frame(mod1$parameters$mean, stringsAsFactors = FALSE) %>%
  rownames_to_column() %>%
  rename(Interest = rowname) %>%
  melt(id.vars = "Interest", variable.name = "Profile", value.name = "Mean") %>%
  mutate(Mean = round(Mean, 2))
p <- means %>%
  ggplot(aes(Interest, Mean, group = Profile, color = Profile)) +
  geom_point(size = 2.25) +
  geom_line(size = 1.25) +
  labs(x = NULL, y = "Standardized mean") +
  theme_bw(base_size = 14) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "top")
ggplotly(p, tooltip = c("Interest", "Mean")) %>%
  layout(legend = list(orientation = "h", y = 1.2))

Diversidad

cfa_1 <- '
diversidad_w01 =~ c06_04_w01 + c06_05_w01 + c06_06_w01
'
cfa_2 <- '
diversidad_w03 =~ c06_04_w03 + c06_05_w03 + c06_06_w03
'

fit_1 <- cfa(cfa_1,data=data,missing="ML",estimator="MLR")
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING: some cases are empty and will be ignored:
##   219 266 392 437 497 499 504 519 587 606 636 665 675 725 803 804 808 840 1464 1473 1481
fit_2 <- cfa(cfa_2,data=data,missing="ML",estimator="MLR")
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING: some cases are empty and will be ignored:
##   314 328 418 495 497 524 962 1169 1187 1201 1455 1472
p1 <- predict(fit_1)
p2 <- predict(fit_2)

scores_diversidad = as.data.frame(p1)
scores_diversidad2 = as.data.frame(p2)
# Merge with factor scores
data = as.data.frame(cbind(data, scores_diversidad, scores_diversidad2))

diversidad <- data %>% select(diversidad_w01, diversidad_w03) %>% na.omit()

# Check
diversidad %>% dplyr::select(diversidad_w01, diversidad_w03)  %>% sjlabelled::as_label(.)  %>%  dfSummary(, graph.col = FALSE)
## Data Frame Summary  
## diversidad  
## Dimensions: 1459 x 2  
## Duplicates: 535  
## 
## -----------------------------------------------------------------------------------------
## No   Variable         Stats / Values           Freqs (% of Valid)    Valid      Missing  
## ---- ---------------- ------------------------ --------------------- ---------- ---------
## 1    diversidad_w01   Mean (sd) : 0 (0.8)      154 distinct values   1459       0        
##      [numeric]        min < med < max:                               (100.0%)   (0.0%)   
##                       -1.4 < 0.1 < 1.8                                                   
##                       IQR (CV) : 1.2 (615.4)                                             
## 
## 2    diversidad_w03   Mean (sd) : 0 (0.8)      139 distinct values   1459       0        
##      [numeric]        min < med < max:                               (100.0%)   (0.0%)   
##                       -1.4 < 0.1 < 1.8                                                   
##                       IQR (CV) : 1.2 (156.5)                                             
## -----------------------------------------------------------------------------------------
BIC <- mclustBIC(diversidad)
plot(BIC)

summary(BIC)
## Best BIC values:
##             EVV,8       EVI,8       EVI,9
## BIC      -6331.28 -6360.32808 -6361.32995
## BIC diff     0.00   -29.04825   -30.05011
mod1 <- Mclust(diversidad, modelNames = "EVI", G = 3, x = BIC)
summary(mod1)
## ---------------------------------------------------- 
## Gaussian finite mixture model fitted by EM algorithm 
## ---------------------------------------------------- 
## 
## Mclust EVI (diagonal, equal volume, varying shape) model with 3 components: 
## 
##  log-likelihood    n df       BIC       ICL
##       -3322.609 1459 12 -6732.644 -7439.059
## 
## Clustering table:
##   1   2   3 
## 388 564 507
ICL <- mclustICL(diversidad)
plot(ICL)

summary(ICL)
## Best ICL values:
##             EVE,6       EVV,8       EEE,1
## ICL      -6692.18 -6709.33934 -6787.78941
## ICL diff     0.00   -17.15973   -95.60981

Visualización

means <- data.frame(mod1$parameters$mean, stringsAsFactors = FALSE) %>%
  rownames_to_column() %>%
  rename(Interest = rowname) %>%
  melt(id.vars = "Interest", variable.name = "Profile", value.name = "Mean") %>%
  mutate(Mean = round(Mean, 2))
p <- means %>%
  ggplot(aes(Interest, Mean, group = Profile, color = Profile)) +
  geom_point(size = 2.25) +
  geom_line(size = 1.25) +
  labs(x = NULL, y = "Standardized mean") +
  theme_bw(base_size = 14) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "top")
ggplotly(p, tooltip = c("Interest", "Mean")) %>%
  layout(legend = list(orientation = "h", y = 1.2))